library(tidyverse)
library(ggpubr)
library(patchwork)
library(stringr)

ward_02 <- read.csv('data/uk_2002_loc.csv')
ward_06 <- read.csv('data/borough-elections-turnout-2006.csv')
ward_10 <- read.csv('data/borough-election-turnout-2010.csv')
ward_14 <- read.csv('data/uk_2014_loc.csv')
ward_18 <- read.csv('data/uk_2018_loc.csv')

#### Cleaning ####

ward_02 <- ward_02 %>% 
  mutate(ward = str_squish(ward)) %>% 
  mutate(ward = str_replace_all(ward, fixed(' '), ''),
         ward = str_remove_all(ward, "[[:punct:]]"))


ward_06 <- ward_06 %>% 
  rename(ward = 'Ward',
         borough = 'Borough', 
         trnt = "Percentage.poll.2006") %>% 
  mutate(ward = str_squish(ward)) %>% 
  mutate(ward = str_replace_all(ward, fixed(' '), ''),
         ward = str_remove_all(ward, "[[:punct:]]")) %>%   
  mutate(ward = str_replace(ward, 'And', 'and'),
         ward = str_replace(ward, '&', 'and'),
         ward = tolower(ward)) %>% 
  select(2:4) %>% 
  group_by(borough, ward)

ward_10 <- ward_10 %>% 
  rename(ward = 'Ward',
         borough = 'Borough', 
         trnt = "Percentage.poll.2010") %>%
  mutate(ward = str_squish(ward)) %>% 
  mutate(ward = str_replace_all(ward, fixed(' '), ''),
         ward = str_remove_all(ward, "[[:punct:]]")) %>%   
  mutate(ward = tolower(ward),
         ward = str_replace(ward, 'And', 'and'),
         ward = str_replace(ward, '&', 'and')) %>% 
  select(2:4) %>% 
  group_by(borough, ward)

ward_l_14 <- ward_14 %>% 
  rename(ward_gsscode = "ward_gsscode") %>% 
  mutate(ward_name = tolower(ward_name),
         ward_name = str_replace(ward_name, 'And', 'and'),
         ward_name = str_replace(ward_name, '&', 'and'),
         ward_name = str_squish(ward_name),
         ward_name = str_replace_all(ward_name, fixed(' '), ''),
         ward_name = str_remove_all(ward_name, "[[:punct:]]")) %>% 
  select(1:3)

ward_l_18 <- ward_18 %>% 
  select(1, 2, 4) %>% 
  rename(ward_gsscode = "ONS.Ward.code",
         ward_name = "Ward", 
         borough_name = "Local.Authority") %>% 
  mutate(ward_name = str_squish(ward_name)) %>% 
  mutate(ward_name = str_replace_all(ward_name, fixed(' '), ''),
         ward_name = str_remove_all(ward_name, "[[:punct:]]")) %>%   
  mutate(ward_name = tolower(ward_name),
         ward_name = str_replace(ward_name, 'And', 'and'),
         ward_name = str_replace(ward_name, '&', 'and')) 


wards <- bind_rows(ward_l_14, ward_l_18)

wards <- unique(wards)

wards <- wards %>% 
  group_by(ward_gsscode)

wards$full <- paste(wards$borough_name, 
                       wards$ward_name)

wards <- wards %>% 
  select(1, 4)

ward_02 <- ward_02 %>% 
  mutate(ward = tolower(ward))

ward_02$full <- paste(ward_02$borough, ward_02$ward)

ward_02 <- left_join(ward_02, wards, by = 'full')

ward_02 <- ward_02 %>% 
  rename(ward_code = "ward_gsscode", 
         loc = "borough") %>% 
  filter(!duplicated(full))

ward_06$full <- paste(ward_06$borough, ward_06$ward)

ward_06 <- ward_06 %>% 
  mutate(trnt = trnt/100) %>% 
  select(3, 4)

ward_06 <- left_join(ward_06, wards, by = 'full')

ward_06 <- ward_06 %>% 
  drop_na() %>% 
  rename(ward_code = "ward_gsscode", 
         loc = "borough") %>%
  mutate(treat = 0, 
         year = 2006)

ward_10$full <- paste(ward_10$borough, ward_10$ward)

ward_10 <- ward_10 %>% 
  mutate(trnt = trnt/100) %>% 
  select(3, 4)

ward_10 <- left_join(ward_10, wards, by = 'full')

ward_10 <- ward_10 %>% 
  drop_na() %>% 
  rename(ward_code = "ward_gsscode", 
         loc = "borough") %>%
  mutate(treat = 0, 
         year = 2010)

ward_14 <- ward_14 %>% 
  rename(ward_code = "ward_gsscode",
         eli_voters = "Number.of.local.government.electors.entitled.to.vote.in.this.election",
         ballot_tot = "Number.of.ballot.papers",
         ballot_inp = "Ballot.papers.from.polling.stations", 
         ballot_post = "Ballot.papers.from.postal.votes", 
         trnt = "Percentage.poll",
         loc = "borough_name") %>% 
  mutate(ward_name = str_squish(ward_name)) %>% 
  mutate(ward_name = str_replace_all(ward_name, fixed(' '), ''),
         ward_name = str_remove_all(ward_name, "[[:punct:]]")) %>%   
  mutate(trnt = trnt/100,
         trnt_inp = ballot_inp/eli_voters,
         ward_name = tolower(ward_name),
         ward_name = str_replace(ward_name, 'And', 'and'),
         ward_name = str_replace(ward_name, '&', 'and'),
         full = paste(loc, ward_name)) %>% 
  select(c('ward_code', 'trnt', 'full', 'loc'))

ward_18 <- ward_18 %>% 
  rename(ward_code = "ONS.Ward.code", 
         eli_voters = "Electorate",
         trnt = "Ballot.box.turnout..incl..votes.rejected.at.count.....",
         ballot_post = "Number.of.postal.votes.included.in.the.count",
         ballot_tot = "Total.ballots.at.the.count",
         loc = "Local.Authority" ) %>% 
  mutate(Ward = str_squish(Ward)) %>% 
  mutate(Ward = str_replace_all(Ward, fixed(' '), ''),
         Ward = str_remove_all(Ward, "[[:punct:]]")) %>% 
  mutate(ballot_inp = ballot_tot - ballot_post,
         trnt_inp = ballot_inp/eli_voters,
         treat = ifelse(loc == "Bromley", 1, 0),
         Ward = tolower(Ward),
         Ward = str_replace(Ward, 'And', 'and'),
         Ward = str_replace(Ward, '&', 'and'),
         full = paste(loc, Ward)) %>% 
  select(c('ward_code', 'trnt', 'full', 'loc', 'treat'))

ward_02$year <- 2002
ward_02$treat <- 0

ward_14$year <- 2014 
ward_14$treat <- 0

ward_18$year <- 2018 


ward_14_18 <- bind_rows(ward_14, ward_18, ward_10, ward_06, ward_02)

ward_14_18_tab <- ward_14_18 %>% 
  group_by(ward_code) %>% 
  summarise(n()) %>% 
  rename(count = "n()") %>% 
  filter(count >= 1)

ward_full <- right_join(ward_14_18, ward_14_18_tab, by = 'ward_code')

ward_full <- ward_full %>% 
  group_by(ward_code) %>% 
  summarise(count = n(), across()) %>% 
  ungroup()

write.csv(ward_full, 'data/ward_full.csv')
